from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
#Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
%matplotlib inline
pd.set_option('display.max_columns', 26)
# Removes the limit for the number of displayed columns
pd.set_option("display.max_columns", None)
# Sets the limit for the number of displayed rows
pd.set_option("display.max_rows", 200)
# setting the precision of floating numbers to 5 decimal points
pd.set_option("display.float_format", lambda x: "%.5f" % x)
# split the data into train and test
from sklearn.model_selection import train_test_split
sns.set()
# to build linear regression_model using statsmodels
import statsmodels.api as sm
# to check model performance
from sklearn.metrics import mean_absolute_error, mean_squared_error
#To ignore unecessary errors
import warnings
warnings.filterwarnings("ignore")
#warnings.simplefilter("ignore", ConvergenceWarning)
# To get diferent metric scores
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
from sklearn import metrics
from sklearn.metrics import roc_curve,auc
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import make_scorer
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from statsmodels.tools.sm_exceptions import ConvergenceWarning
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from statsmodels.tools.sm_exceptions import ConvergenceWarning
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import KFold, StratifiedKFold
from imblearn.over_sampling import SMOTE
from sklearn.metrics import average_precision_score
import sklearn.linear_model
from sklearn.model_selection import GridSearchCV, train_test_split
import pylab as pl
import scipy.optimize as opt
from sklearn import preprocessing
# To access to the dataset
data='/content/drive/MyDrive/Colab Notebooks/bank-full.csv'
# To read the dataset
df=pd.read_csv(data, delimiter=';')
# let us check the first 5 rows of the dataset
df.head()
| age | job | marital | education | default | balance | housing | loan | contact | day | month | duration | campaign | pdays | previous | poutcome | y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 58 | management | married | tertiary | no | 2143 | yes | no | unknown | 5 | may | 261 | 1 | -1 | 0 | unknown | no |
| 1 | 44 | technician | single | secondary | no | 29 | yes | no | unknown | 5 | may | 151 | 1 | -1 | 0 | unknown | no |
| 2 | 33 | entrepreneur | married | secondary | no | 2 | yes | yes | unknown | 5 | may | 76 | 1 | -1 | 0 | unknown | no |
| 3 | 47 | blue-collar | married | unknown | no | 1506 | yes | no | unknown | 5 | may | 92 | 1 | -1 | 0 | unknown | no |
| 4 | 33 | unknown | single | unknown | no | 1 | no | no | unknown | 5 | may | 198 | 1 | -1 | 0 | unknown | no |
# let us check the last 5 rows of the dataset
df.tail()
| age | job | marital | education | default | balance | housing | loan | contact | day | month | duration | campaign | pdays | previous | poutcome | y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 45206 | 51 | technician | married | tertiary | no | 825 | no | no | cellular | 17 | nov | 977 | 3 | -1 | 0 | unknown | yes |
| 45207 | 71 | retired | divorced | primary | no | 1729 | no | no | cellular | 17 | nov | 456 | 2 | -1 | 0 | unknown | yes |
| 45208 | 72 | retired | married | secondary | no | 5715 | no | no | cellular | 17 | nov | 1127 | 5 | 184 | 3 | success | yes |
| 45209 | 57 | blue-collar | married | secondary | no | 668 | no | no | telephone | 17 | nov | 508 | 4 | -1 | 0 | unknown | no |
| 45210 | 37 | entrepreneur | married | secondary | no | 2971 | no | no | cellular | 17 | nov | 361 | 2 | 188 | 11 | other | no |
# Let us check the dataset shape
df.shape
(45211, 17)
#checking duplicated values
df.duplicated().sum()
0
#checking for null values
df.isnull().sum()
age 0 job 0 marital 0 education 0 default 0 balance 0 housing 0 loan 0 contact 0 day 0 month 0 duration 0 campaign 0 pdays 0 previous 0 poutcome 0 y 0 dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 45211 entries, 0 to 45210 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 age 45211 non-null int64 1 job 45211 non-null object 2 marital 45211 non-null object 3 education 45211 non-null object 4 default 45211 non-null object 5 balance 45211 non-null int64 6 housing 45211 non-null object 7 loan 45211 non-null object 8 contact 45211 non-null object 9 day 45211 non-null int64 10 month 45211 non-null object 11 duration 45211 non-null int64 12 campaign 45211 non-null int64 13 pdays 45211 non-null int64 14 previous 45211 non-null int64 15 poutcome 45211 non-null object 16 y 45211 non-null object dtypes: int64(7), object(10) memory usage: 5.9+ MB
# Extracting categorical and numerical columns
cat_cols = [col for col in df.columns if df[col].dtype == 'object']
num_cols = [col for col in df.columns if df[col].dtype != 'object']
# looking at unique values in categorical columns
for col in cat_cols:
print(f"{col} has {df[col].unique()} values\n")
job has ['management' 'technician' 'entrepreneur' 'blue-collar' 'unknown' 'retired' 'admin.' 'services' 'self-employed' 'unemployed' 'housemaid' 'student'] values marital has ['married' 'single' 'divorced'] values education has ['tertiary' 'secondary' 'unknown' 'primary'] values default has ['no' 'yes'] values housing has ['yes' 'no'] values loan has ['no' 'yes'] values contact has ['unknown' 'cellular' 'telephone'] values month has ['may' 'jun' 'jul' 'aug' 'oct' 'nov' 'dec' 'jan' 'feb' 'mar' 'apr' 'sep'] values poutcome has ['unknown' 'failure' 'other' 'success'] values y has ['no' 'yes'] values
df.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| age | 45211.00000 | 40.93621 | 10.61876 | 18.00000 | 33.00000 | 39.00000 | 48.00000 | 95.00000 |
| balance | 45211.00000 | 1362.27206 | 3044.76583 | -8019.00000 | 72.00000 | 448.00000 | 1428.00000 | 102127.00000 |
| day | 45211.00000 | 15.80642 | 8.32248 | 1.00000 | 8.00000 | 16.00000 | 21.00000 | 31.00000 |
| duration | 45211.00000 | 258.16308 | 257.52781 | 0.00000 | 103.00000 | 180.00000 | 319.00000 | 4918.00000 |
| campaign | 45211.00000 | 2.76384 | 3.09802 | 1.00000 | 1.00000 | 2.00000 | 3.00000 | 63.00000 |
| pdays | 45211.00000 | 40.19783 | 100.12875 | -1.00000 | -1.00000 | -1.00000 | -1.00000 | 871.00000 |
| previous | 45211.00000 | 0.58032 | 2.30344 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 275.00000 |
df.columns
Index(['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays',
'previous', 'poutcome', 'y'],
dtype='object')
# defining functions to create plot
def violin(col):
fig = px.violin(df, y=col, x="class", color="class", box=True, template = 'plotly_dark')
return fig.show()
def kde(col):
grid = sns.FacetGrid(df, hue="class", height = 6, aspect=2)
grid.map(sns.kdeplot, col)
grid.add_legend()
def scatter(col1, col2):
fig = px.scatter(df, x=col1, y=col2, color="class", template = 'plotly_dark')
return fig.show()
def univar_vis(fd): # Define univariate visualization function
title = fd.name
fig, axes = plt.subplots(2, 2, figsize=(10, 6))
fig.suptitle(title.upper() + " " + "Distribution")
sns.distplot(fd, color="green", bins=5, ax=axes[0, 0], hist=None)
sns.boxplot(fd, ax=axes[0, 1])
sns.violinplot(fd, ax=axes[1, 0])
sns.histplot(fd, ax=axes[1, 1])
axes[0, 0].axvline(fd.mean(), color="black", linewidth=0.7)
axes[0, 0].axvline(fd.median(), color="red", linewidth=0.3)
axes[0, 1].axvline(fd.median(), color="red", linewidth=0.9)
axes[0, 1].axvline(fd.mean(), color="purple", linewidth=0.7)
axes[1, 0].axvline(fd.mean(), color="purple", linewidth=0.7)
axes[1, 0].axvline(fd.median(), color="green", linewidth=0.7)
axes[1, 1].axvline(fd.mean(), color="purple", linewidth=0.7)
axes[1, 1].axvline(fd.median(), color="green", linewidth=0.7)
plt.tight_layout()
plt.show()
univar_vis(df["age"])
univar_vis(df["balance"])
univar_vis(df["day"])
univar_vis(df["duration"])
univar_vis(df["pdays"])
univar_vis(df["duration"])
univar_vis(df["campaign"])
univar_vis(df["previous"])
# checking numerical features distribution
plt.figure(figsize = (20, 15))
plotnumber = 1
for column in num_cols:
if plotnumber <= 14:
ax = plt.subplot(3, 5, plotnumber)
sns.distplot(df[column])
plt.xlabel(column)
plotnumber += 1
plt.tight_layout()
plt.show()
Skewness is present in some of the columns.
# looking at categorical columns
plt.figure(figsize = (20, 10))
plotnumber = 1
for column in cat_cols:
if plotnumber <= 11:
ax = plt.subplot(3, 4, plotnumber)
sns.countplot(df[column], palette = 'bright')
plt.xlabel(column)
plotnumber += 1
plt.tight_layout()
plt.show()
# Display columns
df.columns
Index(['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays',
'previous', 'poutcome', 'y'],
dtype='object')
# looking at unique values in categorical columns
for col in cat_cols:
print(f"{col} has {df[col].unique()} values\n")
job has ['management' 'technician' 'entrepreneur' 'blue-collar' 'unknown' 'retired' 'admin.' 'services' 'self-employed' 'unemployed' 'housemaid' 'student'] values marital has ['married' 'single' 'divorced'] values education has ['tertiary' 'secondary' 'unknown' 'primary'] values default has ['no' 'yes'] values housing has ['yes' 'no'] values loan has ['no' 'yes'] values contact has ['unknown' 'cellular' 'telephone'] values month has ['may' 'jun' 'jul' 'aug' 'oct' 'nov' 'dec' 'jan' 'feb' 'mar' 'apr' 'sep'] values poutcome has ['unknown' 'failure' 'other' 'success'] values y has ['no' 'yes'] values
sns.countplot(y ='job', data=df, hue='y', palette = 'bright') #Job vs y
plt.show()
sns.countplot(y ='marital', data=df, hue='y', palette = 'bright') #marital vs y
plt.show()
Married are predominant and the majority have not been more convinced, hence have not deposited. However, there have also more deposit compared to single and divorced
sns.countplot(y ='education', data=df, hue='y', palette = 'bright') #education vs y
plt.show()
Candidates with secondary education level have more deposit, followed by candidates with tertiary then primary
sns.countplot(y ='contact', data=df, hue='y', palette = 'bright') #contact vs y
plt.show()
Most of candidates convinced were contacted via cellular
sns.countplot(y ='poutcome', data=df, hue='y', palette = 'bright') # poutcome vs y
plt.show()
poutcome is higher from unknown compared to those who succeeded, followed by those who failed
fig,axes = plt.subplots(5,2,figsize=(12,15))
for idx,cat_cols in enumerate(num_cols):
row,col = idx//2,idx%2
sns.boxplot(y=cat_cols,data=df,x='y',ax=axes[row,col], palette = 'bright')
plt.subplots_adjust(hspace=1)
# pair plot showing relationsh amoung variables
sns.pairplot(data=df, hue='y', corner=True)
from time import sleep
from tqdm import tqdm
for i in tqdm (range (10)):
sleep(3)
100%|██████████| 10/10 [00:30<00:00, 3.01s/it]
#heatmap of data
plt.figure(figsize=(15,10))
sns.heatmap(df.corr(), annot=True, linewidths=2, linecolor='lightgrey')
plt.show()
Balance and duration have high correlation compared to the others
ax = sns.catplot(
y="job", x="balance", data=df, kind="bar", height=4.5, aspect=3, palette = 'bright'
)
ax.set_xticklabels(rotation=0).set(
title="JOB TYPE vs. BALANCE");
The average yearly salary of candidates in management and unknown category is equally high compared to the others, even though we observed an exception with retired people who have highest balance.
ax = sns.catplot(
y="education", x="balance", data=df, kind="bar", height=4.5, aspect=3, palette = 'bright'
)
ax.set_xticklabels(rotation=0).set(
title="EDUCATION vs. BALANCE");
Candidates with tertiary education earn more compared to the others
ax = sns.catplot(
y="marital", x="balance", data=df, kind="bar", height=4.5, aspect=3, palette = 'bright'
)
ax.set_xticklabels(rotation=0).set(
title="MARITAL vs. BALANCE");
Married candidates have highest average yearly balance compared to single and divorced candidates
ax = sns.catplot(
y="job", x="duration", data=df, kind="bar", height=4.5, aspect=3, palette = 'bright'
)
ax.set_xticklabels(rotation=0).set(
title="JOB vs. DURATION");
ax = sns.catplot(
y="marital", x="duration", data=df, kind="bar", height=4.5, aspect=3, palette = 'bright'
)
ax.set_xticklabels(rotation=0).set(
title="EDUCATION vs. DURATION");
Singles followed by divorced were the most being in contact during the marketing campaign
ax = sns.catplot(
y="marital", x="campaign", data=df, kind="bar", height=4.5, aspect=3, palette = 'bright'
)
ax.set_xticklabels(rotation=0).set(
title="MARITAL vs. CAMPAIGN");
ax = sns.catplot(
y="job", x="campaign", data=df, kind="bar", height=4.5, aspect=3, palette = 'bright'
)
ax.set_xticklabels(rotation=0).set(
title="JOB vs. CAMPAIGN");
Defaults = df.copy()
print(Defaults["campaign"].value_counts())
print(Defaults["job"].value_counts())
1 17544 2 12505 3 5521 4 3522 5 1764 6 1291 7 735 8 540 9 327 10 266 11 201 12 155 13 133 14 93 15 84 16 79 17 69 18 51 19 44 20 43 21 35 22 23 25 22 23 22 24 20 29 16 28 16 26 13 31 12 27 10 32 9 30 8 33 6 34 5 36 4 35 4 43 3 38 3 37 2 50 2 41 2 46 1 58 1 55 1 63 1 51 1 39 1 44 1 Name: campaign, dtype: int64 blue-collar 9732 management 9458 technician 7597 admin. 5171 services 4154 retired 2264 self-employed 1579 entrepreneur 1487 unemployed 1303 housemaid 1240 student 938 unknown 288 Name: job, dtype: int64
df.dtypes
age int64 job object marital object education object default object balance int64 housing object loan object contact object day int64 month object duration int64 campaign int64 pdays int64 previous int64 poutcome object y object dtype: object
from sklearn.impute import SimpleImputer
# Identify columns with missing values
columns_with_missing = df.columns[df.isnull().any()]
# Impute numerical columns with mean
numerical_cols = df.select_dtypes(include='number').columns
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].median())
# Impute categorical columns with mode
categorical_cols = df.select_dtypes(include='object').columns
df[categorical_cols] = df[categorical_cols].fillna(df[categorical_cols].mode().iloc[0])
# Verify if there are still any missing values
remaining_missing = df.isnull().sum().sum()
print(f"Remaining missing values: {remaining_missing}")
Remaining missing values: 0
df.head()
| age | job | marital | education | default | balance | housing | loan | contact | day | month | duration | campaign | pdays | previous | poutcome | y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 58 | management | married | tertiary | no | 2143 | yes | no | unknown | 5 | may | 261 | 1 | -1 | 0 | unknown | no |
| 1 | 44 | technician | single | secondary | no | 29 | yes | no | unknown | 5 | may | 151 | 1 | -1 | 0 | unknown | no |
| 2 | 33 | entrepreneur | married | secondary | no | 2 | yes | yes | unknown | 5 | may | 76 | 1 | -1 | 0 | unknown | no |
| 3 | 47 | blue-collar | married | unknown | no | 1506 | yes | no | unknown | 5 | may | 92 | 1 | -1 | 0 | unknown | no |
| 4 | 33 | unknown | single | unknown | no | 1 | no | no | unknown | 5 | may | 198 | 1 | -1 | 0 | unknown | no |
df = df.rename(columns={'y': 'deposit'})
# Or rename the existing DataFrame (rather than creating a copy)
#df.rename(columns={'y': 'newName1', 'oldName2': 'newName2'}, inplace=True)
df.head()
| age | job | marital | education | default | balance | housing | loan | contact | day | month | duration | campaign | pdays | previous | poutcome | deposit | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 58 | management | married | tertiary | no | 2143 | yes | no | unknown | 5 | may | 261 | 1 | -1 | 0 | unknown | no |
| 1 | 44 | technician | single | secondary | no | 29 | yes | no | unknown | 5 | may | 151 | 1 | -1 | 0 | unknown | no |
| 2 | 33 | entrepreneur | married | secondary | no | 2 | yes | yes | unknown | 5 | may | 76 | 1 | -1 | 0 | unknown | no |
| 3 | 47 | blue-collar | married | unknown | no | 1506 | yes | no | unknown | 5 | may | 92 | 1 | -1 | 0 | unknown | no |
| 4 | 33 | unknown | single | unknown | no | 1 | no | no | unknown | 5 | may | 198 | 1 | -1 | 0 | unknown | no |
# replace the ‘deposit' column contains
# the values 'yes' and 'no' with
# 1 and 0:
df['deposit'] = df['deposit'].map(
{'yes':1 ,'no':0})
df.head()
| age | job | marital | education | default | balance | housing | loan | contact | day | month | duration | campaign | pdays | previous | poutcome | deposit | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 58 | management | married | tertiary | no | 2143 | yes | no | unknown | 5 | may | 261 | 1 | -1 | 0 | unknown | 0 |
| 1 | 44 | technician | single | secondary | no | 29 | yes | no | unknown | 5 | may | 151 | 1 | -1 | 0 | unknown | 0 |
| 2 | 33 | entrepreneur | married | secondary | no | 2 | yes | yes | unknown | 5 | may | 76 | 1 | -1 | 0 | unknown | 0 |
| 3 | 47 | blue-collar | married | unknown | no | 1506 | yes | no | unknown | 5 | may | 92 | 1 | -1 | 0 | unknown | 0 |
| 4 | 33 | unknown | single | unknown | no | 1 | no | no | unknown | 5 | may | 198 | 1 | -1 | 0 | unknown | 0 |
#Let's identify the categorical features
df.columns[df.dtypes == object]
Index(['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact',
'month', 'poutcome'],
dtype='object')
# apply get_dummies function
df_encoded = pd.get_dummies(df, columns=['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact',
'month', 'poutcome'])
df_encoded.head()
| age | balance | day | duration | campaign | pdays | previous | deposit | job_admin. | job_blue-collar | job_entrepreneur | job_housemaid | job_management | job_retired | job_self-employed | job_services | job_student | job_technician | job_unemployed | job_unknown | marital_divorced | marital_married | marital_single | education_primary | education_secondary | education_tertiary | education_unknown | default_no | default_yes | housing_no | housing_yes | loan_no | loan_yes | contact_cellular | contact_telephone | contact_unknown | month_apr | month_aug | month_dec | month_feb | month_jan | month_jul | month_jun | month_mar | month_may | month_nov | month_oct | month_sep | poutcome_failure | poutcome_other | poutcome_success | poutcome_unknown | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 58 | 2143 | 5 | 261 | 1 | -1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 1 | 44 | 29 | 5 | 151 | 1 | -1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 2 | 33 | 2 | 5 | 76 | 1 | -1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 3 | 47 | 1506 | 5 | 92 | 1 | -1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 4 | 33 | 1 | 5 | 198 | 1 | -1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
y = df_encoded.deposit
X = df_encoded.drop("deposit", axis = 1)
# Splitting the data into train and test sets in 70:30 ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state=1, shuffle = True )
X_train.shape, X_test.shape
((31647, 51), (13564, 51))
print("Shape of Training set : ", X_train.shape)
print("Shape of test set : ", X_test.shape)
print("Percentage of classes in training set:")
print(y_train.value_counts(normalize=True))
print("Percentage of classes in test set:")
print(y_test.value_counts(normalize=True))
Shape of Training set : (31647, 51) Shape of test set : (13564, 51) Percentage of classes in training set: 0 0.88188 1 0.11812 Name: deposit, dtype: float64 Percentage of classes in test set: 0 0.88565 1 0.11435 Name: deposit, dtype: float64
model = DecisionTreeClassifier(random_state=1)
model.fit(X_train, y_train)
DecisionTreeClassifier(random_state=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier(random_state=1)
# defining a function to compute different metrics to check performance of a classification model built using sklearn
def model_performance_classification_sklearn(model, predictors, target):
"""
Function to compute different metrics to check classification model performance
model: classifier
predictors: independent variables
target: dependent variable
"""
# predicting using the independent variables
pred = model.predict(predictors)
acc = accuracy_score(target, pred) # to compute Accuracy
recall = recall_score(target, pred) # to compute Recall
precision = precision_score(target, pred) # to compute Precision
f1 = f1_score(target, pred) # to compute F1-score
# creating a dataframe of metrics
df_perf = pd.DataFrame(
{"Accuracy": acc, "Recall": recall, "Precision": precision, "F1": f1,},
index=[0],
)
return df_perf
def confusion_matrix_sklearn(model, predictors, target):
"""
To plot the confusion_matrix with percentages
model: classifier
predictors: independent variables
target: dependent variable
"""
y_pred = model.predict(predictors)
cm = confusion_matrix(target, y_pred)
labels = np.asarray(
[
["{0:0.0f}".format(item) + "\n{0:.2%}".format(item / cm.flatten().sum())]
for item in cm.flatten()
]
).reshape(2, 2)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=labels, fmt="")
plt.ylabel("True label")
plt.xlabel("Predicted label")
confusion_matrix_sklearn(model, X_train, y_train)
decision_tree_perf_train = model_performance_classification_sklearn(
model, X_train, y_train
)
decision_tree_perf_train
| Accuracy | Recall | Precision | F1 | |
|---|---|---|---|---|
| 0 | 1.00000 | 1.00000 | 1.00000 | 1.00000 |
The model is not performing well on trained dataset
confusion_matrix_sklearn(model, X_test, y_test)
decision_tree_perf_test = model_performance_classification_sklearn(
model, X_test, y_test
)
decision_tree_perf_test
| Accuracy | Recall | Precision | F1 | |
|---|---|---|---|---|
| 0 | 0.87651 | 0.49323 | 0.46252 | 0.47738 |
Accuracy is high when the model is trained on test dataset
Before pruning the tree let's check the important features
Plotting the feature importance of each variable
feature_names = list(X_train.columns)
importances = model.feature_importances_
indices = np.argsort(importances)
plt.figure(figsize=(8, 10))
plt.title("Feature Importances")
plt.barh(range(len(indices)), importances[indices], color="blue", align="center")
plt.yticks(range(len(indices)), [feature_names[i] for i in indices])
plt.xlabel("Relative Importance")
plt.show()
Duration and average yearly balance are the most important features in this case of decision tree using sklearn, without pruning it
# Choose the type of classifier.
estimator = DecisionTreeClassifier(random_state=1, class_weight="balanced")
# Grid of parameters to choose from
parameters = {
"max_depth": np.arange(2, 7, 2),
"max_leaf_nodes": [50, 75, 150, 250],
"min_samples_split": [10, 30, 50, 70],
}
# Type of scoring used to compare parameter combinations
acc_scorer = make_scorer(f1_score)
# Run the grid search
grid_obj = GridSearchCV(estimator, parameters, scoring=acc_scorer, cv=5)
grid_obj = grid_obj.fit(X_train, y_train)
# Set the clf to the best combination of parameters
estimator = grid_obj.best_estimator_
# Fit the best algorithm to the data.
estimator.fit(X_train, y_train)
DecisionTreeClassifier(class_weight='balanced', max_depth=6, max_leaf_nodes=50,
min_samples_split=50, random_state=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. DecisionTreeClassifier(class_weight='balanced', max_depth=6, max_leaf_nodes=50,
min_samples_split=50, random_state=1)decision_tree_tune_perf_train = model_performance_classification_sklearn(
estimator, X_train, y_train
)
decision_tree_tune_perf_train
| Accuracy | Recall | Precision | F1 | |
|---|---|---|---|---|
| 0 | 0.82513 | 0.82772 | 0.38753 | 0.52790 |
confusion_matrix_sklearn(estimator, X_train, y_train)
decision_tree_tune_perf_train = model_performance_classification_sklearn(
estimator, X_train, y_train
)
decision_tree_tune_perf_train
| Accuracy | Recall | Precision | F1 | |
|---|---|---|---|---|
| 0 | 0.82513 | 0.82772 | 0.38753 | 0.52790 |
We still have a good accuracy when the model sees the train dataset
confusion_matrix_sklearn(estimator, X_test, y_test)
decision_tree_tune_perf_test = model_performance_classification_sklearn(
estimator, X_test, y_test
)
decision_tree_tune_perf_test
| Accuracy | Recall | Precision | F1 | |
|---|---|---|---|---|
| 0 | 0.82159 | 0.81560 | 0.37217 | 0.51111 |
Accuracy remains almost the same when the model sees the test dataset
plt.figure(figsize=(20, 10))
out = tree.plot_tree(
estimator,
feature_names=feature_names,
filled=True,
fontsize=9,
node_ids=False,
class_names=None,
)
# below code will add arrows to the decision tree split if they are missing
for o in out:
arrow = o.arrow_patch
if arrow is not None:
arrow.set_edgecolor("black")
arrow.set_linewidth(1)
plt.show()
# Text report showing the rules of a decision tree -
print(tree.export_text(estimator, feature_names=feature_names, show_weights=True))
|--- duration <= 205.50 | |--- poutcome_success <= 0.50 | | |--- month_mar <= 0.50 | | | |--- month_oct <= 0.50 | | | | |--- duration <= 124.50 | | | | | |--- month_apr <= 0.50 | | | | | | |--- weights: [5320.42, 309.02] class: 0 | | | | | |--- month_apr > 0.50 | | | | | | |--- weights: [271.58, 105.83] class: 0 | | | | |--- duration > 124.50 | | | | | |--- housing_no <= 0.50 | | | | | | |--- weights: [2297.92, 245.52] class: 0 | | | | | |--- housing_no > 0.50 | | | | | | |--- weights: [1556.33, 863.56] class: 0 | | | |--- month_oct > 0.50 | | | | |--- duration <= 95.50 | | | | | |--- marital_divorced <= 0.50 | | | | | | |--- weights: [40.25, 0.00] class: 0 | | | | | |--- marital_divorced > 0.50 | | | | | | |--- weights: [5.10, 8.47] class: 1 | | | | |--- duration > 95.50 | | | | | |--- day <= 20.50 | | | | | | |--- weights: [39.12, 59.26] class: 1 | | | | | |--- day > 20.50 | | | | | | |--- weights: [15.31, 165.09] class: 1 | | |--- month_mar > 0.50 | | | |--- duration <= 79.00 | | | | |--- weights: [16.44, 4.23] class: 0 | | | |--- duration > 79.00 | | | | |--- campaign <= 7.50 | | | | | |--- previous <= 4.50 | | | | | | |--- weights: [38.55, 215.89] class: 1 | | | | | |--- previous > 4.50 | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | |--- campaign > 7.50 | | | | | |--- weights: [3.40, 0.00] class: 0 | |--- poutcome_success > 0.50 | | |--- duration <= 132.50 | | | |--- campaign <= 3.50 | | | | |--- pdays <= 98.50 | | | | | |--- duration <= 80.00 | | | | | | |--- weights: [3.97, 0.00] class: 0 | | | | | |--- duration > 80.00 | | | | | | |--- weights: [13.61, 80.43] class: 1 | | | | |--- pdays > 98.50 | | | | | |--- month_aug <= 0.50 | | | | | | |--- weights: [30.62, 50.80] class: 1 | | | | | |--- month_aug > 0.50 | | | | | | |--- weights: [9.64, 0.00] class: 0 | | | |--- campaign > 3.50 | | | | |--- weights: [13.61, 0.00] class: 0 | | |--- duration > 132.50 | | | |--- job_entrepreneur <= 0.50 | | | | |--- pdays <= 202.50 | | | | | |--- balance <= -56.50 | | | | | | |--- weights: [0.57, 0.00] class: 0 | | | | | |--- balance > -56.50 | | | | | | |--- weights: [34.59, 524.91] class: 1 | | | | |--- pdays > 202.50 | | | | | |--- weights: [9.64, 42.33] class: 1 | | | |--- job_entrepreneur > 0.50 | | | | |--- weights: [1.13, 0.00] class: 0 |--- duration > 205.50 | |--- duration <= 473.50 | | |--- contact_unknown <= 0.50 | | | |--- poutcome_success <= 0.50 | | | | |--- housing_no <= 0.50 | | | | | |--- pdays <= 374.50 | | | | | | |--- weights: [1636.84, 922.83] class: 0 | | | | | |--- pdays > 374.50 | | | | | | |--- weights: [4.54, 101.60] class: 1 | | | | |--- housing_no > 0.50 | | | | | |--- loan_yes <= 0.50 | | | | | | |--- weights: [1200.84, 2933.57] class: 1 | | | | | |--- loan_yes > 0.50 | | | | | | |--- weights: [252.30, 126.99] class: 0 | | | |--- poutcome_success > 0.50 | | | | |--- pdays <= 29.50 | | | | | |--- weights: [3.40, 4.23] class: 1 | | | | |--- pdays > 29.50 | | | | | |--- housing_yes <= 0.50 | | | | | | |--- weights: [39.12, 1164.12] class: 1 | | | | | |--- housing_yes > 0.50 | | | | | | |--- weights: [29.48, 355.58] class: 1 | | |--- contact_unknown > 0.50 | | | |--- month_oct <= 0.50 | | | | |--- duration <= 368.50 | | | | | |--- month_nov <= 0.50 | | | | | | |--- weights: [1207.07, 29.63] class: 0 | | | | | |--- month_nov > 0.50 | | | | | | |--- weights: [3.97, 12.70] class: 1 | | | | |--- duration > 368.50 | | | | | |--- month_may <= 0.50 | | | | | | |--- weights: [115.66, 63.50] class: 0 | | | | | |--- month_may > 0.50 | | | | | | |--- weights: [222.82, 29.63] class: 0 | | | |--- month_oct > 0.50 | | | | |--- weights: [0.00, 25.40] class: 1 | |--- duration > 473.50 | | |--- duration <= 647.50 | | | |--- contact_unknown <= 0.50 | | | | |--- poutcome_success <= 0.50 | | | | | |--- month_jul <= 0.50 | | | | | | |--- weights: [374.77, 1515.47] class: 1 | | | | | |--- month_jul > 0.50 | | | | | | |--- weights: [138.91, 300.55] class: 1 | | | | |--- poutcome_success > 0.50 | | | | | |--- balance <= 13362.50 | | | | | | |--- weights: [11.34, 304.79] class: 1 | | | | | |--- balance > 13362.50 | | | | | | |--- weights: [1.13, 0.00] class: 0 | | | |--- contact_unknown > 0.50 | | | | |--- month_may <= 0.50 | | | | | |--- balance <= 2941.00 | | | | | | |--- weights: [60.10, 152.39] class: 1 | | | | | |--- balance > 2941.00 | | | | | | |--- weights: [16.44, 8.47] class: 0 | | | | |--- month_may > 0.50 | | | | | |--- duration <= 505.50 | | | | | | |--- weights: [48.19, 16.93] class: 0 | | | | | |--- duration > 505.50 | | | | | | |--- weights: [111.13, 114.29] class: 1 | | |--- duration > 647.50 | | | |--- contact_unknown <= 0.50 | | | | |--- duration <= 847.50 | | | | | |--- poutcome_success <= 0.50 | | | | | | |--- weights: [213.75, 1439.27] class: 1 | | | | | |--- poutcome_success > 0.50 | | | | | | |--- weights: [4.54, 173.56] class: 1 | | | | |--- duration > 847.50 | | | | | |--- duration <= 3193.00 | | | | | | |--- weights: [179.73, 2290.13] class: 1 | | | | | |--- duration > 3193.00 | | | | | | |--- weights: [1.13, 0.00] class: 0 | | | |--- contact_unknown > 0.50 | | | | |--- duration <= 827.50 | | | | | |--- duration <= 815.50 | | | | | | |--- weights: [119.06, 330.19] class: 1 | | | | | |--- duration > 815.50 | | | | | | |--- weights: [7.94, 0.00] class: 0 | | | | |--- duration > 827.50 | | | | | |--- campaign <= 11.50 | | | | | | |--- weights: [91.28, 732.33] class: 1 | | | | | |--- campaign > 11.50 | | | | | | |--- weights: [2.83, 0.00] class: 0
Plotting the feature importance of each variable
# importance of features in the tree building
importances = estimator.feature_importances_
indices = np.argsort(importances)
plt.figure(figsize=(8, 8))
plt.title("Feature Importances")
plt.barh(range(len(indices)), importances[indices], color="blue", align="center")
plt.yticks(range(len(indices)), [feature_names[i] for i in indices])
plt.xlabel("Relative Importance")
plt.show()
After pre pruning, duration remains the most important features, however in this case, poutcome is the next important feature.
clf = DecisionTreeClassifier(random_state=1, class_weight="balanced")
path = clf.cost_complexity_pruning_path(X_train, y_train)
ccp_alphas, impurities = abs(path.ccp_alphas), path.impurities
pd.DataFrame(path)
| ccp_alphas | impurities | |
|---|---|---|
| 0 | 0.00000 | -0.00000 |
| 1 | 0.00000 | -0.00000 |
| 2 | 0.00000 | -0.00000 |
| 3 | 0.00000 | -0.00000 |
| 4 | 0.00000 | -0.00000 |
| ... | ... | ... |
| 1583 | 0.00914 | 0.30836 |
| 1584 | 0.01229 | 0.32065 |
| 1585 | 0.02263 | 0.34329 |
| 1586 | 0.02630 | 0.39588 |
| 1587 | 0.10412 | 0.50000 |
1588 rows × 2 columns
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(ccp_alphas[:-1], impurities[:-1], marker="o", drawstyle="steps-post")
ax.set_xlabel("effective alpha")
ax.set_ylabel("total impurity of leaves")
ax.set_title("Total Impurity vs effective alpha for training set")
plt.show()
Next, we train a decision tree using effective alphas. The last value in ccp_alphas is the alpha value that prunes the whole tree, leaving the tree, clfs[-1], with one node
clfs = []
for ccp_alpha in ccp_alphas:
clf = DecisionTreeClassifier(
random_state=1, ccp_alpha=ccp_alpha, class_weight="balanced"
)
clf.fit(X_train, y_train)
clfs.append(clf)
print(
"Number of nodes in the last tree is: {} with ccp_alpha: {}".format(
clfs[-1].tree_.node_count, ccp_alphas[-1]
)
)
Number of nodes in the last tree is: 1 with ccp_alpha: 0.10412091138731988
clfs = clfs[:-1]
ccp_alphas = ccp_alphas[:-1]
node_counts = [clf.tree_.node_count for clf in clfs]
depth = [clf.tree_.max_depth for clf in clfs]
fig, ax = plt.subplots(2, 1, figsize=(10, 7))
ax[0].plot(ccp_alphas, node_counts, marker="o", drawstyle="steps-post")
ax[0].set_xlabel("alpha")
ax[0].set_ylabel("number of nodes")
ax[0].set_title("Number of nodes vs alpha")
ax[1].plot(ccp_alphas, depth, marker="o", drawstyle="steps-post")
ax[1].set_xlabel("alpha")
ax[1].set_ylabel("depth of tree")
ax[1].set_title("Depth vs alpha")
fig.tight_layout()
f1_train = []
for clf in clfs:
pred_train = clf.predict(X_train)
values_train = f1_score(y_train, pred_train)
f1_train.append(values_train)
f1_test = []
for clf in clfs:
pred_test = clf.predict(X_test)
values_test = f1_score(y_test, pred_test)
f1_test.append(values_test)
fig, ax = plt.subplots(figsize=(15, 5))
ax.set_xlabel("alpha")
ax.set_ylabel("F1 Score")
ax.set_title("F1 Score vs alpha for training and testing sets")
ax.plot(ccp_alphas, f1_train, marker="o", label="train", drawstyle="steps-post")
ax.plot(ccp_alphas, f1_test, marker="o", label="test", drawstyle="steps-post")
ax.legend()
plt.show()
index_best_model = np.argmax(f1_test)
best_model = clfs[index_best_model]
print(best_model)
DecisionTreeClassifier(ccp_alpha=7.258901846204399e-05, class_weight='balanced',
random_state=1)
confusion_matrix_sklearn(best_model, X_train, y_train)
decision_tree_post_perf_train = model_performance_classification_sklearn(
best_model, X_train, y_train
)
decision_tree_post_perf_train
| Accuracy | Recall | Precision | F1 | |
|---|---|---|---|---|
| 0 | 0.93127 | 0.99866 | 0.63239 | 0.77440 |
Accuracy has slightly improved in post prune case
confusion_matrix_sklearn(best_model, X_test, y_test)
decision_tree_post_test = model_performance_classification_sklearn(
best_model, X_test, y_test
)
decision_tree_post_test
| Accuracy | Recall | Precision | F1 | |
|---|---|---|---|---|
| 0 | 0.86597 | 0.73050 | 0.44730 | 0.55485 |
Observations
After post pruning the decision tree the performance has generalized on training and test set. We are getting reasonable accuracy value compared to the other metrics that highly fluctuate.
plt.figure(figsize=(20, 10))
out = tree.plot_tree(
best_model,
feature_names=feature_names,
filled=True,
fontsize=9,
node_ids=False,
class_names=None,
)
for o in out:
arrow = o.arrow_patch
if arrow is not None:
arrow.set_edgecolor("black")
arrow.set_linewidth(1)
plt.show()
# Text report showing the rules of a decision tree -
print(tree.export_text(best_model, feature_names=feature_names, show_weights=True))
|--- duration <= 205.50 | |--- poutcome_success <= 0.50 | | |--- month_mar <= 0.50 | | | |--- month_oct <= 0.50 | | | | |--- duration <= 124.50 | | | | | |--- month_apr <= 0.50 | | | | | | |--- month_feb <= 0.50 | | | | | | | |--- month_sep <= 0.50 | | | | | | | | |--- age <= 24.50 | | | | | | | | | |--- duration <= 99.50 | | | | | | | | | | |--- month_nov <= 0.50 | | | | | | | | | | | |--- weights: [45.92, 0.00] class: 0 | | | | | | | | | | |--- month_nov > 0.50 | | | | | | | | | | | |--- weights: [1.13, 4.23] class: 1 | | | | | | | | | |--- duration > 99.50 | | | | | | | | | | |--- balance <= 242.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | | |--- balance > 242.50 | | | | | | | | | | | |--- weights: [10.21, 0.00] class: 0 | | | | | | | | |--- age > 24.50 | | | | | | | | | |--- day <= 4.50 | | | | | | | | | | |--- contact_cellular <= 0.50 | | | | | | | | | | | |--- weights: [159.88, 0.00] class: 0 | | | | | | | | | | |--- contact_cellular > 0.50 | | | | | | | | | | | |--- truncated branch of depth 8 | | | | | | | | | |--- day > 4.50 | | | | | | | | | | |--- pdays <= 392.00 | | | | | | | | | | | |--- truncated branch of depth 15 | | | | | | | | | | |--- pdays > 392.00 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | |--- month_sep > 0.50 | | | | | | | | |--- duration <= 90.50 | | | | | | | | | |--- weights: [19.28, 0.00] class: 0 | | | | | | | | |--- duration > 90.50 | | | | | | | | | |--- day <= 10.50 | | | | | | | | | | |--- job_services <= 0.50 | | | | | | | | | | | |--- weights: [14.74, 0.00] class: 0 | | | | | | | | | | |--- job_services > 0.50 | | | | | | | | | | | |--- weights: [0.57, 4.23] class: 1 | | | | | | | | | |--- day > 10.50 | | | | | | | | | | |--- day <= 12.50 | | | | | | | | | | | |--- weights: [1.13, 16.93] class: 1 | | | | | | | | | | |--- day > 12.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | |--- month_feb > 0.50 | | | | | | | |--- day <= 9.50 | | | | | | | | |--- education_tertiary <= 0.50 | | | | | | | | | |--- weights: [211.48, 0.00] class: 0 | | | | | | | | |--- education_tertiary > 0.50 | | | | | | | | | |--- balance <= 388.50 | | | | | | | | | | |--- weights: [45.92, 0.00] class: 0 | | | | | | | | | |--- balance > 388.50 | | | | | | | | | | |--- age <= 29.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- age > 29.50 | | | | | | | | | | | |--- truncated branch of depth 5 | | | | | | | |--- day > 9.50 | | | | | | | | |--- duration <= 74.50 | | | | | | | | | |--- pdays <= 296.50 | | | | | | | | | | |--- weights: [15.88, 0.00] class: 0 | | | | | | | | | |--- pdays > 296.50 | | | | | | | | | | |--- weights: [0.00, 4.23] class: 1 | | | | | | | | |--- duration > 74.50 | | | | | | | | | |--- contact_cellular <= 0.50 | | | | | | | | | | |--- weights: [2.27, 0.00] class: 0 | | | | | | | | | |--- contact_cellular > 0.50 | | | | | | | | | | |--- duration <= 121.50 | | | | | | | | | | | |--- truncated branch of depth 5 | | | | | | | | | | |--- duration > 121.50 | | | | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | |--- month_apr > 0.50 | | | | | | |--- housing_yes <= 0.50 | | | | | | | |--- day <= 27.50 | | | | | | | | |--- duration <= 96.50 | | | | | | | | | |--- balance <= 294.50 | | | | | | | | | | |--- day <= 14.50 | | | | | | | | | | | |--- weights: [3.97, 0.00] class: 0 | | | | | | | | | | |--- day > 14.50 | | | | | | | | | | | |--- weights: [2.83, 12.70] class: 1 | | | | | | | | | |--- balance > 294.50 | | | | | | | | | | |--- weights: [21.54, 0.00] class: 0 | | | | | | | | |--- duration > 96.50 | | | | | | | | | |--- age <= 35.00 | | | | | | | | | | |--- age <= 20.50 | | | | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | | | | | |--- age > 20.50 | | | | | | | | | | | |--- weights: [4.54, 55.03] class: 1 | | | | | | | | | |--- age > 35.00 | | | | | | | | | | |--- contact_unknown <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- contact_unknown > 0.50 | | | | | | | | | | | |--- weights: [0.00, 4.23] class: 1 | | | | | | | |--- day > 27.50 | | | | | | | | |--- weights: [20.98, 0.00] class: 0 | | | | | | |--- housing_yes > 0.50 | | | | | | | |--- day <= 23.50 | | | | | | | | |--- duration <= 104.50 | | | | | | | | | |--- job_self-employed <= 0.50 | | | | | | | | | | |--- weights: [152.51, 0.00] class: 0 | | | | | | | | | |--- job_self-employed > 0.50 | | | | | | | | | | |--- balance <= 129.00 | | | | | | | | | | | |--- weights: [0.57, 4.23] class: 1 | | | | | | | | | | |--- balance > 129.00 | | | | | | | | | | | |--- weights: [5.10, 0.00] class: 0 | | | | | | | | |--- duration > 104.50 | | | | | | | | | |--- age <= 32.50 | | | | | | | | | | |--- balance <= 770.50 | | | | | | | | | | | |--- weights: [3.97, 0.00] class: 0 | | | | | | | | | | |--- balance > 770.50 | | | | | | | | | | | |--- weights: [0.00, 12.70] class: 1 | | | | | | | | | |--- age > 32.50 | | | | | | | | | | |--- weights: [27.21, 0.00] class: 0 | | | | | | | |--- day > 23.50 | | | | | | | | |--- day <= 27.50 | | | | | | | | | |--- balance <= 169.50 | | | | | | | | | | |--- weights: [2.83, 0.00] class: 0 | | | | | | | | | |--- balance > 169.50 | | | | | | | | | | |--- weights: [1.13, 12.70] class: 1 | | | | | | | | |--- day > 27.50 | | | | | | | | | |--- weights: [8.50, 0.00] class: 0 | | | | |--- duration > 124.50 | | | | | |--- housing_no <= 0.50 | | | | | | |--- month_sep <= 0.50 | | | | | | | |--- month_may <= 0.50 | | | | | | | | |--- pdays <= 18.50 | | | | | | | | | |--- age <= 60.50 | | | | | | | | | | |--- month_feb <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 9 | | | | | | | | | | |--- month_feb > 0.50 | | | | | | | | | | | |--- truncated branch of depth 4 | | | | | | | | | |--- age > 60.50 | | | | | | | | | | |--- weights: [0.57, 8.47] class: 1 | | | | | | | | |--- pdays > 18.50 | | | | | | | | | |--- pdays <= 93.50 | | | | | | | | | | |--- balance <= 25.50 | | | | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | | | | | |--- balance > 25.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | |--- pdays > 93.50 | | | | | | | | | | |--- month_aug <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 6 | | | | | | | | | | |--- month_aug > 0.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | |--- month_may > 0.50 | | | | | | | | |--- pdays <= 381.50 | | | | | | | | | |--- balance <= 7102.50 | | | | | | | | | | |--- balance <= 2.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | | |--- balance > 2.50 | | | | | | | | | | | |--- weights: [930.39, 0.00] class: 0 | | | | | | | | | |--- balance > 7102.50 | | | | | | | | | | |--- age <= 32.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | | |--- age > 32.50 | | | | | | | | | | | |--- weights: [21.54, 0.00] class: 0 | | | | | | | | |--- pdays > 381.50 | | | | | | | | | |--- weights: [0.00, 4.23] class: 1 | | | | | | |--- month_sep > 0.50 | | | | | | | |--- day <= 5.00 | | | | | | | | |--- weights: [3.97, 0.00] class: 0 | | | | | | | |--- day > 5.00 | | | | | | | | |--- pdays <= 185.00 | | | | | | | | | |--- weights: [1.13, 25.40] class: 1 | | | | | | | | |--- pdays > 185.00 | | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | |--- housing_no > 0.50 | | | | | | |--- month_apr <= 0.50 | | | | | | | |--- pdays <= 41.50 | | | | | | | | |--- age <= 61.50 | | | | | | | | | |--- age <= 28.50 | | | | | | | | | | |--- balance <= 10.00 | | | | | | | | | | | |--- weights: [17.58, 0.00] class: 0 | | | | | | | | | | |--- balance > 10.00 | | | | | | | | | | | |--- truncated branch of depth 9 | | | | | | | | | |--- age > 28.50 | | | | | | | | | | |--- day <= 3.50 | | | | | | | | | | | |--- truncated branch of depth 6 | | | | | | | | | | |--- day > 3.50 | | | | | | | | | | | |--- truncated branch of depth 15 | | | | | | | | |--- age > 61.50 | | | | | | | | | |--- day <= 11.50 | | | | | | | | | | |--- duration <= 147.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- duration > 147.50 | | | | | | | | | | | |--- weights: [9.64, 0.00] class: 0 | | | | | | | | | |--- day > 11.50 | | | | | | | | | | |--- duration <= 199.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- duration > 199.50 | | | | | | | | | | | |--- weights: [2.27, 0.00] class: 0 | | | | | | | |--- pdays > 41.50 | | | | | | | | |--- pdays <= 126.50 | | | | | | | | | |--- day <= 16.50 | | | | | | | | | | |--- job_student <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | | |--- job_student > 0.50 | | | | | | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | | | | | | |--- day > 16.50 | | | | | | | | | | |--- balance <= 1770.50 | | | | | | | | | | | |--- truncated branch of depth 4 | | | | | | | | | | |--- balance > 1770.50 | | | | | | | | | | | |--- weights: [11.91, 0.00] class: 0 | | | | | | | | |--- pdays > 126.50 | | | | | | | | | |--- job_admin. <= 0.50 | | | | | | | | | | |--- month_aug <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 7 | | | | | | | | | | |--- month_aug > 0.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | |--- job_admin. > 0.50 | | | | | | | | | | |--- weights: [3.40, 21.17] class: 1 | | | | | | |--- month_apr > 0.50 | | | | | | | |--- day <= 28.50 | | | | | | | | |--- campaign <= 5.50 | | | | | | | | | |--- contact_telephone <= 0.50 | | | | | | | | | | |--- job_self-employed <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 5 | | | | | | | | | | |--- job_self-employed > 0.50 | | | | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | | | | |--- contact_telephone > 0.50 | | | | | | | | | | |--- balance <= 2808.00 | | | | | | | | | | | |--- weights: [6.24, 0.00] class: 0 | | | | | | | | | | |--- balance > 2808.00 | | | | | | | | | | | |--- weights: [1.13, 8.47] class: 1 | | | | | | | | |--- campaign > 5.50 | | | | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | | | | |--- day > 28.50 | | | | | | | | |--- duration <= 174.00 | | | | | | | | | |--- weights: [10.21, 0.00] class: 0 | | | | | | | | |--- duration > 174.00 | | | | | | | | | |--- campaign <= 1.50 | | | | | | | | | | |--- weights: [2.27, 8.47] class: 1 | | | | | | | | | |--- campaign > 1.50 | | | | | | | | | | |--- weights: [2.27, 0.00] class: 0 | | | |--- month_oct > 0.50 | | | | |--- duration <= 95.50 | | | | | |--- marital_divorced <= 0.50 | | | | | | |--- weights: [40.25, 0.00] class: 0 | | | | | |--- marital_divorced > 0.50 | | | | | | |--- education_tertiary <= 0.50 | | | | | | | |--- weights: [4.54, 0.00] class: 0 | | | | | | |--- education_tertiary > 0.50 | | | | | | | |--- weights: [0.57, 8.47] class: 1 | | | | |--- duration > 95.50 | | | | | |--- day <= 20.50 | | | | | | |--- education_primary <= 0.50 | | | | | | | |--- campaign <= 2.50 | | | | | | | | |--- previous <= 5.50 | | | | | | | | | |--- duration <= 130.50 | | | | | | | | | | |--- age <= 46.00 | | | | | | | | | | | |--- weights: [8.50, 0.00] class: 0 | | | | | | | | | | |--- age > 46.00 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | |--- duration > 130.50 | | | | | | | | | | |--- weights: [10.21, 46.56] class: 1 | | | | | | | | |--- previous > 5.50 | | | | | | | | | |--- weights: [3.97, 0.00] class: 0 | | | | | | | |--- campaign > 2.50 | | | | | | | | |--- weights: [5.10, 0.00] class: 0 | | | | | | |--- education_primary > 0.50 | | | | | | | |--- weights: [7.37, 0.00] class: 0 | | | | | |--- day > 20.50 | | | | | | |--- campaign <= 3.50 | | | | | | | |--- balance <= 10745.00 | | | | | | | | |--- campaign <= 1.50 | | | | | | | | | |--- weights: [5.67, 139.69] class: 1 | | | | | | | | |--- campaign > 1.50 | | | | | | | | | |--- age <= 65.50 | | | | | | | | | | |--- age <= 41.00 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- age > 41.00 | | | | | | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | | | | | | |--- age > 65.50 | | | | | | | | | | |--- weights: [0.57, 16.93] class: 1 | | | | | | | |--- balance > 10745.00 | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | |--- campaign > 3.50 | | | | | | | |--- weights: [2.27, 0.00] class: 0 | | |--- month_mar > 0.50 | | | |--- duration <= 79.00 | | | | |--- job_management <= 0.50 | | | | | |--- weights: [13.04, 0.00] class: 0 | | | | |--- job_management > 0.50 | | | | | |--- marital_married <= 0.50 | | | | | | |--- weights: [0.00, 4.23] class: 1 | | | | | |--- marital_married > 0.50 | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | |--- duration > 79.00 | | | | |--- previous <= 4.50 | | | | | |--- job_blue-collar <= 0.50 | | | | | | |--- campaign <= 7.50 | | | | | | | |--- education_unknown <= 0.50 | | | | | | | | |--- duration <= 173.50 | | | | | | | | | |--- duration <= 164.50 | | | | | | | | | | |--- age <= 31.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | | |--- age > 31.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | |--- duration > 164.50 | | | | | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | | | | | |--- duration > 173.50 | | | | | | | | | |--- weights: [2.27, 63.50] class: 1 | | | | | | | |--- education_unknown > 0.50 | | | | | | | | |--- day <= 9.00 | | | | | | | | | |--- weights: [0.57, 4.23] class: 1 | | | | | | | | |--- day > 9.00 | | | | | | | | | |--- weights: [3.97, 0.00] class: 0 | | | | | | |--- campaign > 7.50 | | | | | | | |--- weights: [2.83, 0.00] class: 0 | | | | | |--- job_blue-collar > 0.50 | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | |--- previous > 4.50 | | | | | |--- weights: [3.40, 0.00] class: 0 | |--- poutcome_success > 0.50 | | |--- duration <= 132.50 | | | |--- campaign <= 3.50 | | | | |--- pdays <= 98.50 | | | | | |--- duration <= 80.00 | | | | | | |--- weights: [3.97, 0.00] class: 0 | | | | | |--- duration > 80.00 | | | | | | |--- day <= 27.50 | | | | | | | |--- month_oct <= 0.50 | | | | | | | | |--- pdays <= 84.50 | | | | | | | | | |--- pdays <= 67.50 | | | | | | | | | | |--- weights: [0.57, 12.70] class: 1 | | | | | | | | | |--- pdays > 67.50 | | | | | | | | | | |--- weights: [3.97, 0.00] class: 0 | | | | | | | | |--- pdays > 84.50 | | | | | | | | | |--- weights: [6.24, 67.73] class: 1 | | | | | | | |--- month_oct > 0.50 | | | | | | | | |--- weights: [1.13, 0.00] class: 0 | | | | | | |--- day > 27.50 | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | |--- pdays > 98.50 | | | | | |--- month_aug <= 0.50 | | | | | | |--- job_admin. <= 0.50 | | | | | | | |--- month_may <= 0.50 | | | | | | | | |--- balance <= 849.50 | | | | | | | | | |--- weights: [5.10, 38.10] class: 1 | | | | | | | | |--- balance > 849.50 | | | | | | | | | |--- balance <= 3325.00 | | | | | | | | | | |--- month_feb <= 0.50 | | | | | | | | | | | |--- weights: [9.07, 0.00] class: 0 | | | | | | | | | | |--- month_feb > 0.50 | | | | | | | | | | | |--- weights: [1.13, 4.23] class: 1 | | | | | | | | | |--- balance > 3325.00 | | | | | | | | | | |--- weights: [0.57, 8.47] class: 1 | | | | | | | |--- month_may > 0.50 | | | | | | | | |--- weights: [6.24, 0.00] class: 0 | | | | | | |--- job_admin. > 0.50 | | | | | | | |--- weights: [8.50, 0.00] class: 0 | | | | | |--- month_aug > 0.50 | | | | | | |--- weights: [9.64, 0.00] class: 0 | | | |--- campaign > 3.50 | | | | |--- weights: [13.61, 0.00] class: 0 | | |--- duration > 132.50 | | | |--- job_entrepreneur <= 0.50 | | | | |--- pdays <= 202.50 | | | | | |--- balance <= -56.50 | | | | | | |--- weights: [0.57, 0.00] class: 0 | | | | | |--- balance > -56.50 | | | | | | |--- duration <= 162.50 | | | | | | | |--- duration <= 158.50 | | | | | | | | |--- weights: [14.74, 169.33] class: 1 | | | | | | | |--- duration > 158.50 | | | | | | | | |--- job_blue-collar <= 0.50 | | | | | | | | | |--- weights: [4.54, 0.00] class: 0 | | | | | | | | |--- job_blue-collar > 0.50 | | | | | | | | | |--- weights: [0.00, 8.47] class: 1 | | | | | | |--- duration > 162.50 | | | | | | | |--- weights: [15.31, 347.12] class: 1 | | | | |--- pdays > 202.50 | | | | | |--- pdays <= 277.00 | | | | | | |--- weights: [4.54, 0.00] class: 0 | | | | | |--- pdays > 277.00 | | | | | | |--- balance <= 3378.00 | | | | | | | |--- weights: [3.40, 42.33] class: 1 | | | | | | |--- balance > 3378.00 | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | |--- job_entrepreneur > 0.50 | | | | |--- weights: [1.13, 0.00] class: 0 |--- duration > 205.50 | |--- duration <= 473.50 | | |--- contact_unknown <= 0.50 | | | |--- poutcome_success <= 0.50 | | | | |--- housing_no <= 0.50 | | | | | |--- pdays <= 374.50 | | | | | | |--- month_jun <= 0.50 | | | | | | | |--- month_mar <= 0.50 | | | | | | | | |--- duration <= 389.50 | | | | | | | | | |--- month_sep <= 0.50 | | | | | | | | | | |--- month_oct <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 19 | | | | | | | | | | |--- month_oct > 0.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | |--- month_sep > 0.50 | | | | | | | | | | |--- weights: [4.54, 25.40] class: 1 | | | | | | | | |--- duration > 389.50 | | | | | | | | | |--- job_blue-collar <= 0.50 | | | | | | | | | | |--- month_jan <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 13 | | | | | | | | | | |--- month_jan > 0.50 | | | | | | | | | | | |--- weights: [9.64, 0.00] class: 0 | | | | | | | | | |--- job_blue-collar > 0.50 | | | | | | | | | | |--- balance <= 622.00 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | | |--- balance > 622.00 | | | | | | | | | | | |--- truncated branch of depth 4 | | | | | | | |--- month_mar > 0.50 | | | | | | | | |--- weights: [3.40, 59.26] class: 1 | | | | | | |--- month_jun > 0.50 | | | | | | | |--- weights: [10.77, 84.66] class: 1 | | | | | |--- pdays > 374.50 | | | | | | |--- weights: [4.54, 101.60] class: 1 | | | | |--- housing_no > 0.50 | | | | | |--- loan_yes <= 0.50 | | | | | | |--- month_aug <= 0.50 | | | | | | | |--- balance <= 50.50 | | | | | | | | |--- balance <= -46.00 | | | | | | | | | |--- weights: [32.32, 0.00] class: 0 | | | | | | | | |--- balance > -46.00 | | | | | | | | | |--- month_jul <= 0.50 | | | | | | | | | | |--- month_feb <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 9 | | | | | | | | | | |--- month_feb > 0.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | |--- month_jul > 0.50 | | | | | | | | | | |--- duration <= 410.00 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- duration > 410.00 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | |--- balance > 50.50 | | | | | | | | |--- job_blue-collar <= 0.50 | | | | | | | | | |--- month_jan <= 0.50 | | | | | | | | | | |--- month_nov <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 14 | | | | | | | | | | |--- month_nov > 0.50 | | | | | | | | | | | |--- truncated branch of depth 11 | | | | | | | | | |--- month_jan > 0.50 | | | | | | | | | | |--- day <= 25.50 | | | | | | | | | | | |--- weights: [3.40, 71.96] class: 1 | | | | | | | | | | |--- day > 25.50 | | | | | | | | | | | |--- truncated branch of depth 4 | | | | | | | | |--- job_blue-collar > 0.50 | | | | | | | | | |--- month_sep <= 0.50 | | | | | | | | | | |--- pdays <= 142.50 | | | | | | | | | | | |--- truncated branch of depth 11 | | | | | | | | | | |--- pdays > 142.50 | | | | | | | | | | | |--- weights: [15.31, 0.00] class: 0 | | | | | | | | | |--- month_sep > 0.50 | | | | | | | | | | |--- weights: [1.13, 25.40] class: 1 | | | | | | |--- month_aug > 0.50 | | | | | | | |--- campaign <= 1.50 | | | | | | | | |--- job_technician <= 0.50 | | | | | | | | | |--- job_management <= 0.50 | | | | | | | | | | |--- job_blue-collar <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 4 | | | | | | | | | | |--- job_blue-collar > 0.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | |--- job_management > 0.50 | | | | | | | | | | |--- education_tertiary <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- education_tertiary > 0.50 | | | | | | | | | | | |--- truncated branch of depth 4 | | | | | | | | |--- job_technician > 0.50 | | | | | | | | | |--- balance <= 9642.50 | | | | | | | | | | |--- marital_divorced <= 0.50 | | | | | | | | | | | |--- weights: [13.61, 0.00] class: 0 | | | | | | | | | | |--- marital_divorced > 0.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | |--- balance > 9642.50 | | | | | | | | | | |--- weights: [0.00, 8.47] class: 1 | | | | | | | |--- campaign > 1.50 | | | | | | | | |--- duration <= 383.50 | | | | | | | | | |--- poutcome_unknown <= 0.50 | | | | | | | | | | |--- previous <= 1.50 | | | | | | | | | | | |--- weights: [2.27, 25.40] class: 1 | | | | | | | | | | |--- previous > 1.50 | | | | | | | | | | | |--- weights: [2.83, 0.00] class: 0 | | | | | | | | | |--- poutcome_unknown > 0.50 | | | | | | | | | | |--- age <= 59.50 | | | | | | | | | | | |--- truncated branch of depth 10 | | | | | | | | | | |--- age > 59.50 | | | | | | | | | | | |--- truncated branch of depth 5 | | | | | | | | |--- duration > 383.50 | | | | | | | | | |--- pdays <= 43.50 | | | | | | | | | | |--- marital_single <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 5 | | | | | | | | | | |--- marital_single > 0.50 | | | | | | | | | | | |--- truncated branch of depth 5 | | | | | | | | | |--- pdays > 43.50 | | | | | | | | | | |--- weights: [0.00, 16.93] class: 1 | | | | | |--- loan_yes > 0.50 | | | | | | |--- month_jul <= 0.50 | | | | | | | |--- day <= 1.50 | | | | | | | | |--- weights: [0.00, 16.93] class: 1 | | | | | | | |--- day > 1.50 | | | | | | | | |--- day <= 20.50 | | | | | | | | | |--- balance <= 360.50 | | | | | | | | | | |--- weights: [35.72, 0.00] class: 0 | | | | | | | | | |--- balance > 360.50 | | | | | | | | | | |--- balance <= 2094.00 | | | | | | | | | | | |--- truncated branch of depth 5 | | | | | | | | | | |--- balance > 2094.00 | | | | | | | | | | | |--- weights: [9.64, 0.00] class: 0 | | | | | | | | |--- day > 20.50 | | | | | | | | | |--- month_jan <= 0.50 | | | | | | | | | | |--- campaign <= 2.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | | |--- campaign > 2.50 | | | | | | | | | | | |--- weights: [5.67, 0.00] class: 0 | | | | | | | | | |--- month_jan > 0.50 | | | | | | | | | | |--- weights: [10.21, 0.00] class: 0 | | | | | | |--- month_jul > 0.50 | | | | | | | |--- day <= 3.00 | | | | | | | | |--- weights: [0.00, 8.47] class: 1 | | | | | | | |--- day > 3.00 | | | | | | | | |--- duration <= 425.00 | | | | | | | | | |--- age <= 31.50 | | | | | | | | | | |--- marital_divorced <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | | |--- marital_divorced > 0.50 | | | | | | | | | | | |--- weights: [0.57, 4.23] class: 1 | | | | | | | | | |--- age > 31.50 | | | | | | | | | | |--- weights: [102.62, 0.00] class: 0 | | | | | | | | |--- duration > 425.00 | | | | | | | | | |--- duration <= 431.50 | | | | | | | | | | |--- job_blue-collar <= 0.50 | | | | | | | | | | | |--- weights: [0.57, 8.47] class: 1 | | | | | | | | | | |--- job_blue-collar > 0.50 | | | | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | | | | |--- duration > 431.50 | | | | | | | | | | |--- weights: [11.34, 0.00] class: 0 | | | |--- poutcome_success > 0.50 | | | | |--- pdays <= 29.50 | | | | | |--- age <= 59.00 | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | | |--- age > 59.00 | | | | | | |--- weights: [0.00, 4.23] class: 1 | | | | |--- pdays > 29.50 | | | | | |--- housing_yes <= 0.50 | | | | | | |--- weights: [39.12, 1164.12] class: 1 | | | | | |--- housing_yes > 0.50 | | | | | | |--- month_may <= 0.50 | | | | | | | |--- duration <= 225.50 | | | | | | | | |--- day <= 15.00 | | | | | | | | | |--- weights: [1.13, 16.93] class: 1 | | | | | | | | |--- day > 15.00 | | | | | | | | | |--- weights: [3.97, 0.00] class: 0 | | | | | | | |--- duration > 225.50 | | | | | | | | |--- weights: [11.34, 283.62] class: 1 | | | | | | |--- month_may > 0.50 | | | | | | | |--- pdays <= 352.00 | | | | | | | | |--- age <= 38.50 | | | | | | | | | |--- balance <= 544.50 | | | | | | | | | | |--- weights: [2.27, 12.70] class: 1 | | | | | | | | | |--- balance > 544.50 | | | | | | | | | | |--- weights: [6.24, 0.00] class: 0 | | | | | | | | |--- age > 38.50 | | | | | | | | | |--- weights: [1.13, 42.33] class: 1 | | | | | | | |--- pdays > 352.00 | | | | | | | | |--- weights: [3.40, 0.00] class: 0 | | |--- contact_unknown > 0.50 | | | |--- month_oct <= 0.50 | | | | |--- duration <= 368.50 | | | | | |--- month_nov <= 0.50 | | | | | | |--- pdays <= 37.00 | | | | | | | |--- month_feb <= 0.50 | | | | | | | | |--- age <= 26.50 | | | | | | | | | |--- day <= 29.50 | | | | | | | | | | |--- housing_no <= 0.50 | | | | | | | | | | | |--- weights: [48.19, 0.00] class: 0 | | | | | | | | | | |--- housing_no > 0.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | |--- day > 29.50 | | | | | | | | | | |--- education_tertiary <= 0.50 | | | | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | | | | | |--- education_tertiary > 0.50 | | | | | | | | | | | |--- weights: [0.00, 4.23] class: 1 | | | | | | | | |--- age > 26.50 | | | | | | | | | |--- day <= 7.50 | | | | | | | | | | |--- duration <= 208.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- duration > 208.50 | | | | | | | | | | | |--- truncated branch of depth 5 | | | | | | | | | |--- day > 7.50 | | | | | | | | | | |--- weights: [856.12, 0.00] class: 0 | | | | | | | |--- month_feb > 0.50 | | | | | | | | |--- weights: [0.00, 4.23] class: 1 | | | | | | |--- pdays > 37.00 | | | | | | | |--- weights: [0.00, 4.23] class: 1 | | | | | |--- month_nov > 0.50 | | | | | | |--- day <= 15.00 | | | | | | | |--- weights: [0.00, 12.70] class: 1 | | | | | | |--- day > 15.00 | | | | | | | |--- weights: [3.97, 0.00] class: 0 | | | | |--- duration > 368.50 | | | | | |--- month_may <= 0.50 | | | | | | |--- balance <= -356.50 | | | | | | | |--- campaign <= 3.00 | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | | |--- campaign > 3.00 | | | | | | | | |--- weights: [0.00, 12.70] class: 1 | | | | | | |--- balance > -356.50 | | | | | | | |--- age <= 38.50 | | | | | | | | |--- balance <= 290.00 | | | | | | | | | |--- weights: [17.01, 0.00] class: 0 | | | | | | | | |--- balance > 290.00 | | | | | | | | | |--- balance <= 3027.00 | | | | | | | | | | |--- day <= 4.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- day > 4.50 | | | | | | | | | | | |--- truncated branch of depth 7 | | | | | | | | | |--- balance > 3027.00 | | | | | | | | | | |--- weights: [7.37, 0.00] class: 0 | | | | | | | |--- age > 38.50 | | | | | | | | |--- balance <= 16.00 | | | | | | | | | |--- age <= 49.50 | | | | | | | | | | |--- weights: [8.50, 0.00] class: 0 | | | | | | | | | |--- age > 49.50 | | | | | | | | | | |--- duration <= 426.50 | | | | | | | | | | | |--- weights: [1.70, 8.47] class: 1 | | | | | | | | | | |--- duration > 426.50 | | | | | | | | | | | |--- weights: [2.83, 0.00] class: 0 | | | | | | | | |--- balance > 16.00 | | | | | | | | | |--- weights: [52.16, 0.00] class: 0 | | | | | |--- month_may > 0.50 | | | | | | |--- age <= 30.50 | | | | | | | |--- job_admin. <= 0.50 | | | | | | | | |--- job_entrepreneur <= 0.50 | | | | | | | | | |--- default_yes <= 0.50 | | | | | | | | | | |--- weights: [40.82, 0.00] class: 0 | | | | | | | | | |--- default_yes > 0.50 | | | | | | | | | | |--- weights: [0.57, 4.23] class: 1 | | | | | | | | |--- job_entrepreneur > 0.50 | | | | | | | | | |--- weights: [0.57, 4.23] class: 1 | | | | | | | |--- job_admin. > 0.50 | | | | | | | | |--- campaign <= 1.50 | | | | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | | | | | |--- campaign > 1.50 | | | | | | | | | |--- weights: [0.57, 8.47] class: 1 | | | | | | |--- age > 30.50 | | | | | | | |--- duration <= 430.50 | | | | | | | | |--- weights: [125.30, 0.00] class: 0 | | | | | | | |--- duration > 430.50 | | | | | | | | |--- duration <= 431.50 | | | | | | | | | |--- weights: [1.13, 4.23] class: 1 | | | | | | | | |--- duration > 431.50 | | | | | | | | | |--- job_blue-collar <= 0.50 | | | | | | | | | | |--- weights: [36.85, 0.00] class: 0 | | | | | | | | | |--- job_blue-collar > 0.50 | | | | | | | | | | |--- balance <= 3977.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- balance > 3977.50 | | | | | | | | | | | |--- weights: [0.57, 4.23] class: 1 | | | |--- month_oct > 0.50 | | | | |--- weights: [0.00, 25.40] class: 1 | |--- duration > 473.50 | | |--- duration <= 647.50 | | | |--- contact_unknown <= 0.50 | | | | |--- poutcome_success <= 0.50 | | | | | |--- month_jul <= 0.50 | | | | | | |--- month_jan <= 0.50 | | | | | | | |--- housing_yes <= 0.50 | | | | | | | | |--- month_nov <= 0.50 | | | | | | | | | |--- balance <= 12579.50 | | | | | | | | | | |--- month_aug <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 10 | | | | | | | | | | |--- month_aug > 0.50 | | | | | | | | | | | |--- truncated branch of depth 8 | | | | | | | | | |--- balance > 12579.50 | | | | | | | | | | |--- contact_telephone <= 0.50 | | | | | | | | | | | |--- weights: [5.67, 0.00] class: 0 | | | | | | | | | | |--- contact_telephone > 0.50 | | | | | | | | | | | |--- weights: [0.00, 4.23] class: 1 | | | | | | | | |--- month_nov > 0.50 | | | | | | | | | |--- duration <= 619.00 | | | | | | | | | | |--- day <= 20.50 | | | | | | | | | | | |--- truncated branch of depth 7 | | | | | | | | | | |--- day > 20.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | |--- duration > 619.00 | | | | | | | | | | |--- weights: [6.24, 0.00] class: 0 | | | | | | | |--- housing_yes > 0.50 | | | | | | | | |--- month_apr <= 0.50 | | | | | | | | | |--- campaign <= 5.50 | | | | | | | | | | |--- job_unemployed <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 8 | | | | | | | | | | |--- job_unemployed > 0.50 | | | | | | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | | | | | | |--- campaign > 5.50 | | | | | | | | | | |--- weights: [5.67, 0.00] class: 0 | | | | | | | | |--- month_apr > 0.50 | | | | | | | | | |--- day <= 24.50 | | | | | | | | | | |--- day <= 18.50 | | | | | | | | | | | |--- truncated branch of depth 6 | | | | | | | | | | |--- day > 18.50 | | | | | | | | | | | |--- weights: [11.91, 0.00] class: 0 | | | | | | | | | |--- day > 24.50 | | | | | | | | | | |--- weights: [2.83, 29.63] class: 1 | | | | | | |--- month_jan > 0.50 | | | | | | | |--- day <= 14.50 | | | | | | | | |--- weights: [0.57, 12.70] class: 1 | | | | | | | |--- day > 14.50 | | | | | | | | |--- duration <= 482.00 | | | | | | | | | |--- weights: [1.13, 8.47] class: 1 | | | | | | | | |--- duration > 482.00 | | | | | | | | | |--- age <= 30.50 | | | | | | | | | | |--- duration <= 530.00 | | | | | | | | | | | |--- weights: [2.83, 0.00] class: 0 | | | | | | | | | | |--- duration > 530.00 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | |--- age > 30.50 | | | | | | | | | | |--- job_services <= 0.50 | | | | | | | | | | | |--- weights: [23.81, 0.00] class: 0 | | | | | | | | | | |--- job_services > 0.50 | | | | | | | | | | | |--- weights: [0.57, 4.23] class: 1 | | | | | |--- month_jul > 0.50 | | | | | | |--- campaign <= 8.50 | | | | | | | |--- duration <= 530.50 | | | | | | | | |--- duration <= 496.50 | | | | | | | | | |--- loan_yes <= 0.50 | | | | | | | | | | |--- job_admin. <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | | |--- job_admin. > 0.50 | | | | | | | | | | | |--- weights: [2.27, 0.00] class: 0 | | | | | | | | | |--- loan_yes > 0.50 | | | | | | | | | | |--- weights: [5.67, 0.00] class: 0 | | | | | | | | |--- duration > 496.50 | | | | | | | | | |--- age <= 41.50 | | | | | | | | | | |--- age <= 29.50 | | | | | | | | | | | |--- weights: [4.54, 0.00] class: 0 | | | | | | | | | | |--- age > 29.50 | | | | | | | | | | | |--- truncated branch of depth 6 | | | | | | | | | |--- age > 41.50 | | | | | | | | | | |--- weights: [15.88, 0.00] class: 0 | | | | | | | |--- duration > 530.50 | | | | | | | | |--- job_housemaid <= 0.50 | | | | | | | | | |--- balance <= 1596.00 | | | | | | | | | | |--- balance <= 1112.00 | | | | | | | | | | | |--- truncated branch of depth 8 | | | | | | | | | | |--- balance > 1112.00 | | | | | | | | | | | |--- weights: [5.67, 0.00] class: 0 | | | | | | | | | |--- balance > 1596.00 | | | | | | | | | | |--- duration <= 540.00 | | | | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | | | | | |--- duration > 540.00 | | | | | | | | | | | |--- weights: [12.47, 76.20] class: 1 | | | | | | | | |--- job_housemaid > 0.50 | | | | | | | | | |--- weights: [2.27, 0.00] class: 0 | | | | | | |--- campaign > 8.50 | | | | | | | |--- weights: [6.80, 0.00] class: 0 | | | | |--- poutcome_success > 0.50 | | | | | |--- balance <= 13362.50 | | | | | | |--- month_dec <= 0.50 | | | | | | | |--- education_primary <= 0.50 | | | | | | | | |--- weights: [7.37, 292.09] class: 1 | | | | | | | |--- education_primary > 0.50 | | | | | | | | |--- age <= 55.50 | | | | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | | | | | |--- age > 55.50 | | | | | | | | | |--- weights: [0.00, 12.70] class: 1 | | | | | | |--- month_dec > 0.50 | | | | | | | |--- weights: [0.57, 0.00] class: 0 | | | | | |--- balance > 13362.50 | | | | | | |--- weights: [1.13, 0.00] class: 0 | | | |--- contact_unknown > 0.50 | | | | |--- month_may <= 0.50 | | | | | |--- balance <= 2941.00 | | | | | | |--- job_entrepreneur <= 0.50 | | | | | | | |--- day <= 3.50 | | | | | | | | |--- campaign <= 1.50 | | | | | | | | | |--- weights: [5.67, 0.00] class: 0 | | | | | | | | |--- campaign > 1.50 | | | | | | | | | |--- duration <= 588.00 | | | | | | | | | | |--- duration <= 520.50 | | | | | | | | | | | |--- weights: [2.83, 0.00] class: 0 | | | | | | | | | | |--- duration > 520.50 | | | | | | | | | | | |--- weights: [2.27, 12.70] class: 1 | | | | | | | | | |--- duration > 588.00 | | | | | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | | | | |--- day > 3.50 | | | | | | | | |--- housing_no <= 0.50 | | | | | | | | | |--- balance <= -514.50 | | | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | | | | |--- balance > -514.50 | | | | | | | | | | |--- duration <= 595.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- duration > 595.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | |--- housing_no > 0.50 | | | | | | | | | |--- marital_divorced <= 0.50 | | | | | | | | | | |--- age <= 55.50 | | | | | | | | | | | |--- truncated branch of depth 5 | | | | | | | | | | |--- age > 55.50 | | | | | | | | | | | |--- weights: [2.83, 0.00] class: 0 | | | | | | | | | |--- marital_divorced > 0.50 | | | | | | | | | | |--- weights: [6.24, 0.00] class: 0 | | | | | | |--- job_entrepreneur > 0.50 | | | | | | | |--- weights: [5.10, 0.00] class: 0 | | | | | |--- balance > 2941.00 | | | | | | |--- job_admin. <= 0.50 | | | | | | | |--- weights: [15.88, 0.00] class: 0 | | | | | | |--- job_admin. > 0.50 | | | | | | | |--- weights: [0.57, 8.47] class: 1 | | | | |--- month_may > 0.50 | | | | | |--- duration <= 505.50 | | | | | | |--- education_secondary <= 0.50 | | | | | | | |--- balance <= 154.50 | | | | | | | | |--- job_management <= 0.50 | | | | | | | | | |--- weights: [1.70, 12.70] class: 1 | | | | | | | | |--- job_management > 0.50 | | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | | |--- balance > 154.50 | | | | | | | | |--- age <= 52.50 | | | | | | | | | |--- weights: [14.74, 0.00] class: 0 | | | | | | | | |--- age > 52.50 | | | | | | | | | |--- weights: [1.13, 4.23] class: 1 | | | | | | |--- education_secondary > 0.50 | | | | | | | |--- weights: [28.92, 0.00] class: 0 | | | | | |--- duration > 505.50 | | | | | | |--- job_management <= 0.50 | | | | | | | |--- balance <= 279.00 | | | | | | | | |--- job_services <= 0.50 | | | | | | | | | |--- job_retired <= 0.50 | | | | | | | | | | |--- loan_yes <= 0.50 | | | | | | | | | | | |--- weights: [40.82, 0.00] class: 0 | | | | | | | | | | |--- loan_yes > 0.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | |--- job_retired > 0.50 | | | | | | | | | | |--- weights: [1.13, 4.23] class: 1 | | | | | | | | |--- job_services > 0.50 | | | | | | | | | |--- marital_married <= 0.50 | | | | | | | | | | |--- age <= 25.50 | | | | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | | | | | |--- age > 25.50 | | | | | | | | | | | |--- weights: [0.57, 12.70] class: 1 | | | | | | | | | |--- marital_married > 0.50 | | | | | | | | | | |--- weights: [2.27, 0.00] class: 0 | | | | | | | |--- balance > 279.00 | | | | | | | | |--- balance <= 630.50 | | | | | | | | | |--- balance <= 549.00 | | | | | | | | | | |--- balance <= 346.00 | | | | | | | | | | | |--- weights: [6.24, 16.93] class: 1 | | | | | | | | | | |--- balance > 346.00 | | | | | | | | | | | |--- weights: [10.77, 0.00] class: 0 | | | | | | | | | |--- balance > 549.00 | | | | | | | | | | |--- weights: [0.57, 21.17] class: 1 | | | | | | | | |--- balance > 630.50 | | | | | | | | | |--- age <= 26.00 | | | | | | | | | | |--- weights: [0.00, 4.23] class: 1 | | | | | | | | | |--- age > 26.00 | | | | | | | | | | |--- duration <= 589.00 | | | | | | | | | | | |--- truncated branch of depth 7 | | | | | | | | | | |--- duration > 589.00 | | | | | | | | | | | |--- weights: [10.77, 0.00] class: 0 | | | | | | |--- job_management > 0.50 | | | | | | | |--- age <= 45.50 | | | | | | | | |--- campaign <= 3.50 | | | | | | | | | |--- balance <= 831.50 | | | | | | | | | | |--- age <= 31.50 | | | | | | | | | | | |--- weights: [0.00, 8.47] class: 1 | | | | | | | | | | |--- age > 31.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | |--- balance > 831.50 | | | | | | | | | | |--- weights: [2.27, 0.00] class: 0 | | | | | | | | |--- campaign > 3.50 | | | | | | | | | |--- weights: [0.00, 12.70] class: 1 | | | | | | | |--- age > 45.50 | | | | | | | | |--- weights: [3.97, 0.00] class: 0 | | |--- duration > 647.50 | | | |--- contact_unknown <= 0.50 | | | | |--- duration <= 847.50 | | | | | |--- poutcome_success <= 0.50 | | | | | | |--- month_apr <= 0.50 | | | | | | | |--- balance <= -962.50 | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | | |--- balance > -962.50 | | | | | | | | |--- month_nov <= 0.50 | | | | | | | | | |--- balance <= 653.00 | | | | | | | | | | |--- contact_cellular <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 4 | | | | | | | | | | |--- contact_cellular > 0.50 | | | | | | | | | | | |--- truncated branch of depth 17 | | | | | | | | | |--- balance > 653.00 | | | | | | | | | | |--- job_unemployed <= 0.50 | | | | | | | | | | | |--- weights: [49.33, 584.17] class: 1 | | | | | | | | | | |--- job_unemployed > 0.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | |--- month_nov > 0.50 | | | | | | | | | |--- pdays <= 161.50 | | | | | | | | | | |--- contact_telephone <= 0.50 | | | | | | | | | | | |--- weights: [24.95, 131.23] class: 1 | | | | | | | | | | |--- contact_telephone > 0.50 | | | | | | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | | | | | | |--- pdays > 161.50 | | | | | | | | | | |--- weights: [4.54, 0.00] class: 0 | | | | | | |--- month_apr > 0.50 | | | | | | | |--- day <= 20.50 | | | | | | | | |--- duration <= 666.50 | | | | | | | | | |--- weights: [5.10, 0.00] class: 0 | | | | | | | | |--- duration > 666.50 | | | | | | | | | |--- duration <= 722.50 | | | | | | | | | | |--- balance <= 1178.00 | | | | | | | | | | | |--- weights: [4.54, 33.87] class: 1 | | | | | | | | | | |--- balance > 1178.00 | | | | | | | | | | | |--- weights: [2.83, 0.00] class: 0 | | | | | | | | | |--- duration > 722.50 | | | | | | | | | | |--- housing_no <= 0.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- housing_no > 0.50 | | | | | | | | | | | |--- weights: [1.13, 12.70] class: 1 | | | | | | | |--- day > 20.50 | | | | | | | | |--- weights: [1.13, 55.03] class: 1 | | | | | |--- poutcome_success > 0.50 | | | | | | |--- weights: [4.54, 173.56] class: 1 | | | | |--- duration > 847.50 | | | | | |--- weights: [180.86, 2290.13] class: 1 | | | |--- contact_unknown > 0.50 | | | | |--- duration <= 827.50 | | | | | |--- duration <= 815.50 | | | | | | |--- age <= 32.50 | | | | | | | |--- loan_yes <= 0.50 | | | | | | | | |--- balance <= 81.00 | | | | | | | | | |--- balance <= -0.50 | | | | | | | | | | |--- weights: [2.83, 21.17] class: 1 | | | | | | | | | |--- balance > -0.50 | | | | | | | | | | |--- weights: [4.54, 0.00] class: 0 | | | | | | | | |--- balance > 81.00 | | | | | | | | | |--- duration <= 701.50 | | | | | | | | | | |--- age <= 31.50 | | | | | | | | | | | |--- weights: [3.40, 25.40] class: 1 | | | | | | | | | | |--- age > 31.50 | | | | | | | | | | | |--- weights: [2.83, 0.00] class: 0 | | | | | | | | | |--- duration > 701.50 | | | | | | | | | | |--- weights: [3.40, 84.66] class: 1 | | | | | | | |--- loan_yes > 0.50 | | | | | | | | |--- job_admin. <= 0.50 | | | | | | | | | |--- weights: [6.24, 0.00] class: 0 | | | | | | | | |--- job_admin. > 0.50 | | | | | | | | | |--- weights: [0.00, 4.23] class: 1 | | | | | | |--- age > 32.50 | | | | | | | |--- job_technician <= 0.50 | | | | | | | | |--- duration <= 687.00 | | | | | | | | | |--- marital_married <= 0.50 | | | | | | | | | | |--- duration <= 651.50 | | | | | | | | | | | |--- weights: [0.00, 4.23] class: 1 | | | | | | | | | | |--- duration > 651.50 | | | | | | | | | | | |--- weights: [7.94, 0.00] class: 0 | | | | | | | | | |--- marital_married > 0.50 | | | | | | | | | | |--- balance <= 486.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | | |--- balance > 486.50 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | |--- duration > 687.00 | | | | | | | | | |--- marital_married <= 0.50 | | | | | | | | | | |--- duration <= 731.00 | | | | | | | | | | | |--- truncated branch of depth 2 | | | | | | | | | | |--- duration > 731.00 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | |--- marital_married > 0.50 | | | | | | | | | | |--- age <= 44.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | | |--- age > 44.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | |--- job_technician > 0.50 | | | | | | | | |--- day <= 5.50 | | | | | | | | | |--- weights: [3.40, 0.00] class: 0 | | | | | | | | |--- day > 5.50 | | | | | | | | | |--- education_secondary <= 0.50 | | | | | | | | | | |--- weights: [1.13, 0.00] class: 0 | | | | | | | | | |--- education_secondary > 0.50 | | | | | | | | | | |--- duration <= 747.00 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | | | | | |--- duration > 747.00 | | | | | | | | | | | |--- weights: [0.57, 33.87] class: 1 | | | | | |--- duration > 815.50 | | | | | | |--- weights: [7.94, 0.00] class: 0 | | | | |--- duration > 827.50 | | | | | |--- campaign <= 11.50 | | | | | | |--- marital_divorced <= 0.50 | | | | | | | |--- balance <= 790.50 | | | | | | | | |--- duration <= 834.50 | | | | | | | | | |--- weights: [2.27, 0.00] class: 0 | | | | | | | | |--- duration > 834.50 | | | | | | | | | |--- age <= 30.50 | | | | | | | | | | |--- job_technician <= 0.50 | | | | | | | | | | | |--- weights: [4.54, 97.36] class: 1 | | | | | | | | | | |--- job_technician > 0.50 | | | | | | | | | | | |--- weights: [1.70, 0.00] class: 0 | | | | | | | | | |--- age > 30.50 | | | | | | | | | | |--- duration <= 1870.00 | | | | | | | | | | | |--- truncated branch of depth 12 | | | | | | | | | | |--- duration > 1870.00 | | | | | | | | | | | |--- weights: [4.54, 0.00] class: 0 | | | | | | | |--- balance > 790.50 | | | | | | | | |--- age <= 26.50 | | | | | | | | | |--- weights: [2.27, 0.00] class: 0 | | | | | | | | |--- age > 26.50 | | | | | | | | | |--- day <= 2.50 | | | | | | | | | | |--- weights: [1.13, 0.00] class: 0 | | | | | | | | | |--- day > 2.50 | | | | | | | | | | |--- balance <= 6327.50 | | | | | | | | | | | |--- weights: [20.98, 279.39] class: 1 | | | | | | | | | | |--- balance > 6327.50 | | | | | | | | | | | |--- truncated branch of depth 3 | | | | | | |--- marital_divorced > 0.50 | | | | | | | |--- weights: [6.24, 135.46] class: 1 | | | | | |--- campaign > 11.50 | | | | | | |--- weights: [2.83, 0.00] class: 0
Plotting the feature importance of each variable
importances = best_model.feature_importances_
indices = np.argsort(importances)
plt.figure(figsize=(12, 12))
plt.title("Feature Importances")
plt.barh(range(len(indices)), importances[indices], color="Blue", align="center")
plt.yticks(range(len(indices)), [feature_names[i] for i in indices])
plt.xlabel("Relative Importance")
plt.show()
Observations from tree
The tree is quite complex as compared to the pre-pruned tree. The feature importance is fairly the same.
# training performance comparison
models_train_comp_df = pd.concat(
[
decision_tree_perf_train.T,
decision_tree_tune_perf_train.T,
decision_tree_post_perf_train.T,
],
axis=1,
)
models_train_comp_df.columns = [
"Decision Tree sklearn",
"Decision Tree (Pre-Pruning)",
"Decision Tree (Post-Pruning)",
]
print("Training performance comparison:")
models_train_comp_df
Training performance comparison:
| Decision Tree sklearn | Decision Tree (Pre-Pruning) | Decision Tree (Post-Pruning) | |
|---|---|---|---|
| Accuracy | 1.00000 | 0.82513 | 0.93127 |
| Recall | 1.00000 | 0.82772 | 0.99866 |
| Precision | 1.00000 | 0.38753 | 0.63239 |
| F1 | 1.00000 | 0.52790 | 0.77440 |
# testing performance comparison
models_test_comp_df = pd.concat(
[
decision_tree_perf_test.T,
decision_tree_tune_perf_test.T,
decision_tree_post_test.T,
],
axis=1,
)
models_test_comp_df.columns = [
"Decision Tree sklearn",
"Decision Tree (Pre-Pruning)",
"Decision Tree (Post-Pruning)",
]
print("Test set performance comparison:")
models_test_comp_df
Test set performance comparison:
| Decision Tree sklearn | Decision Tree (Pre-Pruning) | Decision Tree (Post-Pruning) | |
|---|---|---|---|
| Accuracy | 0.87651 | 0.82159 | 0.86597 |
| Recall | 0.49323 | 0.81560 | 0.73050 |
| Precision | 0.46252 | 0.37217 | 0.44730 |
| F1 | 0.47738 | 0.51111 | 0.55485 |
Observations
Decision tree model with default parameters is overfitting the training data and is not able to generalize well. Pre-pruned tree has given a generalized performance with balanced values of precision and recall. Post-pruned tree is giving a high F1 score as compared to other models but the difference between precision and recall is high. The bank will be able to maintain a balance between resources, brand equity, candidates who will deposit using the sklearn decision tree model.
# Splitting the data into train and test sets in 70:30 ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, stratify=y )
X_train.shape, X_test.shape
((31647, 51), (13564, 51))
Over Sampling Using SMOTE
sm = SMOTE(random_state=12)
X_train_r, y_train_r = sm.fit_resample(X_train, y_train)
clf_rf = RandomForestClassifier(n_estimators=110, random_state=42)
clf_rf.fit(X_train_r, y_train_r)
RandomForestClassifier(n_estimators=110, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier(n_estimators=110, random_state=42)
confusion_matrix_sklearn(clf_rf, X_train_r, y_train_r)
rf_perf_train = model_performance_classification_sklearn(
clf_rf, X_train_r, y_train_r
)
rf_perf_train
| Accuracy | Recall | Precision | F1 | |
|---|---|---|---|---|
| 0 | 1.00000 | 1.00000 | 1.00000 | 1.00000 |
The model is overfitting on train dataset
confusion_matrix_sklearn(clf_rf, X_test, y_test)
rf_perf_test = model_performance_classification_sklearn(
clf_rf, X_test, y_test
)
rf_perf_test
| Accuracy | Recall | Precision | F1 | |
|---|---|---|---|---|
| 0 | 0.90556 | 0.43163 | 0.64380 | 0.51679 |
The model has a high accuracy compared to the precision
Plotting the feature importance of each variable
index_best_model = np.argmax(f1_test)
best_model = clfs[index_best_model]
print(best_model)
DecisionTreeClassifier(ccp_alpha=7.258901846204399e-05, class_weight='balanced',
random_state=1)
importances = best_model.feature_importances_
indices = np.argsort(importances)
plt.figure(figsize=(12, 12))
plt.title("Feature Importances")
plt.barh(range(len(indices)), importances[indices], color="Blue", align="center")
plt.yticks(range(len(indices)), [feature_names[i] for i in indices])
plt.xlabel("Relative Importance")
plt.show()
Even in random forest model, duration and poutcome are the most important features
# training performance comparison
models_train_comp_df = pd.concat(
[
decision_tree_perf_train.T,
decision_tree_tune_perf_train.T,
decision_tree_post_perf_train.T,
rf_perf_train.T,
],
axis=1,
)
models_train_comp_df.columns = [
"Decision Tree sklearn",
"Decision Tree (Pre-Pruning)",
"Decision Tree (Post-Pruning)",
"Random Forest (resampled)",
]
# test set performance comparison
models_test_comp_df = pd.concat(
[
decision_tree_perf_test.T,
decision_tree_tune_perf_test.T,
decision_tree_post_test.T,
rf_perf_test.T,
],
axis=1,
)
models_test_comp_df.columns = [
"Decision Tree sklearn",
"Decision Tree (Pre-Pruning)",
"Decision Tree (Post-Pruning)",
"Random Forest (resampled)",
]
models_train_comp_df.T
| Accuracy | Recall | Precision | F1 | |
|---|---|---|---|---|
| Decision Tree sklearn | 1.00000 | 1.00000 | 1.00000 | 1.00000 |
| Decision Tree (Pre-Pruning) | 0.82513 | 0.82772 | 0.38753 | 0.52790 |
| Decision Tree (Post-Pruning) | 0.93127 | 0.99866 | 0.63239 | 0.77440 |
| Random Forest (resampled) | 1.00000 | 1.00000 | 1.00000 | 1.00000 |
from matplotlib import pyplot as plt
_df_10['F1'].plot(kind='line', color='#0000FF', figsize=(10, 4), title='F1')
plt.gca().spines[['top', 'right']].set_visible(False)
from matplotlib import pyplot as plt
_df_9['Precision'].plot(kind='line', color='#0000FF',figsize=(10, 4), title='Precision')
plt.gca().spines[['top', 'right']].set_visible(False)
from matplotlib import pyplot as plt
_df_8['Recall'].plot(kind='line', color='#0000FF', figsize=(10, 4), title='Recall')
plt.gca().spines[['top', 'right']].set_visible(False)
from matplotlib import pyplot as plt
_df_7['Accuracy'].plot(kind='line', color='#0000FF', figsize=(10, 4), title='Accuracy')
plt.gca().spines[['top', 'right']].set_visible(False)
# reset the plot configurations to default
plt.rcdefaults()
plt.show()
models_test_comp_df.T
| Accuracy | Recall | Precision | F1 | |
|---|---|---|---|---|
| Decision Tree sklearn | 0.87651 | 0.49323 | 0.46252 | 0.47738 |
| Decision Tree (Pre-Pruning) | 0.82159 | 0.81560 | 0.37217 | 0.51111 |
| Decision Tree (Post-Pruning) | 0.86597 | 0.73050 | 0.44730 | 0.55485 |
| Random Forest (resampled) | 0.90556 | 0.43163 | 0.64380 | 0.51679 |
from matplotlib import pyplot as plt
_df_21['F1'].plot(kind='line', figsize=(10, 4), title='F1')
plt.gca().spines[['top', 'right']].set_visible(False)
from matplotlib import pyplot as plt
_df_20['Precision'].plot(kind='line', figsize=(10, 4), title='Precision')
plt.gca().spines[['top', 'right']].set_visible(False)
from matplotlib import pyplot as plt
_df_19['Recall'].plot(kind='line', figsize=(10, 4), title='Recall')
plt.gca().spines[['top', 'right']].set_visible(False)
from matplotlib import pyplot as plt
_df_18['Accuracy'].plot(kind='line', figsize=(10, 4), title='Accuracy')
plt.gca().spines[['top', 'right']].set_visible(False)
Random forest has a high accuracy compared to Decision Tree sklearn model, using the test dataset
Looking back at the Exploration Data Analysis, Data are highly imbalanced Duration remains the most important feature. We also think that candidates that was contacted during the previous campaign, were most likely to subscribe or deposit after being contacted during the forthcoming campaigns.
Married participated the most in the campaign, followed by single then finally divorced. This clearly illustrates that married people care most about savings, deposit etc compared to the others
In term of job categories, unknown, technicians, managers, self-employed, maid participated mostly in the campaign
Students and retired have not really participated compared to the others, may be because they likely don’t have relative and income.
Random forest (resampled) seems to be the better model with high accuracy on test dataset. We think its better performance may be due to the fact that data were balanced using SMOTE (Synthetic Minority Oversampling TEchnique)